InĀ [1]:
import prody as pr
import py3Dmol
import tempfile
import pandas as pd
from PlotUtils import *
from scipy.ndimage import binary_dilation
import matplotlib.patches as mpatches
InĀ [2]:
fold_pair = '2kkwA_2n0aD'
InĀ [3]:
plot_tool = PlotTool(folder='/Users/steveabecassis/Desktop/Pipeline', fold_pair=fold_pair)
@> 2016 atoms and 34 coordinate set(s) were parsed in 0.49s.
@> 2016 atoms and 1 coordinate set(s) were parsed in 0.05s.
InĀ [4]:
from IPython.display import display, HTML
from Bio import PDB
from Bio import pairwise2
from Bio.PDB import Superimposer
import py3Dmol
import ipywidgets as widgets
def get_seq_from_structure(structure):
return ''.join([PDB.Polypeptide.three_to_one(res.resname)
for res in structure.get_residues()
if PDB.Polypeptide.is_aa(res)])
def get_aligned_atoms(structure1, structure2):
seq1 = get_seq_from_structure(structure1)
seq2 = get_seq_from_structure(structure2)
alignment = pairwise2.align.globalxx(seq1, seq2)[0]
aligned_atoms1 = []
aligned_atoms2 = []
seq1_index = 0
seq2_index = 0
for a1, a2 in zip(alignment[0], alignment[1]):
if a1 != '-' and a2 != '-':
res1 = list(structure1.get_residues())[seq1_index]
res2 = list(structure2.get_residues())[seq2_index]
if 'CA' in res1 and 'CA' in res2:
aligned_atoms1.append(res1['CA'])
aligned_atoms2.append(res2['CA'])
if a1 != '-':
seq1_index += 1
if a2 != '-':
seq2_index += 1
return aligned_atoms1, aligned_atoms2
def toggle_visibility(change):
if change['name'] == 'value':
if change['owner'] == toggle1:
view.setStyle({'model': 0}, {'cartoon': {'color': 'red'}} if change['new'] else {'cartoon': {'hidden': True}})
elif change['owner'] == toggle2:
view.setStyle({'model': 1}, {'cartoon': {'color': 'blue'}} if change['new'] else {'cartoon': {'hidden': True}})
view.render()
def visualize_alignement_structure(PATH_PDB1,PATH_PDB2):
# Set up PDB parser
parser = PDB.PDBParser()
# Load structures
structure1 = parser.get_structure("protein1", PATH_PDB1)
structure2 = parser.get_structure("protein2", PATH_PDB2)
# Get aligned atoms
aligned_atoms1, aligned_atoms2 = get_aligned_atoms(structure1, structure2)
# Align structures
super_imposer = Superimposer()
super_imposer.set_atoms(aligned_atoms1, aligned_atoms2)
super_imposer.apply(structure2.get_atoms())
# Save aligned structure
io = PDB.PDBIO()
io.set_structure(structure2)
io.save("aligned_protein2.pdb")
# Create py3Dmol view
view = py3Dmol.view(width=800, height=600)
# Add structures to the view
with open(pdb_file2_, 'r') as f:
view.addModel(f.read(), 'pdb')
with open("aligned_protein2.pdb", 'r') as f:
view.addModel(f.read(), 'pdb')
# Set styles
view.setStyle({'model': 0}, {'cartoon': {'color': 'red'}})
view.setStyle({'model': 1}, {'cartoon': {'color': 'blue'}})
# Zoom to fit
view.zoomTo()
# Create toggle buttons
toggle1 = widgets.ToggleButton(description='Toggle Protein 1', value=True)
toggle2 = widgets.ToggleButton(description='Toggle Protein 2', value=True)
toggle1.observe(toggle_visibility)
toggle2.observe(toggle_visibility)
# Display the view and toggle buttons
display(HTML(view._make_html()))
display(widgets.HBox([toggle1, toggle2]))
from Bio.PDB import PDBParser
from Bio import PDB
from Bio import pairwise2
def extract_protein_sequence(pdb_file):
parser = PDBParser()
structure = parser.get_structure("protein_structure", pdb_file)
residue_sequence = ""
flag = 0
# Iterate through the structure and extract the residue sequence
for model in structure:
for chain in model:
if flag!=0:
break
flag +=1
for residue in chain:
if PDB.is_aa(residue):
residue_sequence += PDB.Polypeptide.three_to_one(residue.get_resname())
return residue_sequence
def get_align_indexes(seqA,seqB):
alignments = pairwise2.align.globalxx(seqA,seqB,one_alignment_only=True)
best_align = alignments[0]
seqA = best_align.seqA
seqB = best_align.seqB
cursA = 0
cursB = 0
seqA_idxs = []
seqB_idxs = []
for aa in range(len(seqA)):
if (seqA[aa] != '-')&(seqB[aa] != '-'):
seqA_idxs.append(cursA)
seqB_idxs.append(cursB)
cursA +=1
cursB +=1
if (seqA[aa] == '-')&(seqB[aa] != '-'):
cursB +=1
if (seqA[aa] != '-')&(seqB[aa] == '-'):
cursA +=1
return seqA_idxs,seqB_idxs
def dilate_with_tolerance(array, tolerance):
structure = np.ones((2 * tolerance + 1, 2 * tolerance + 1), dtype=int)
return binary_dilation(array, structure=structure).astype(int)
def load_pred_cmap(fileName):
cmap = np.load(f'{plot_tool.folder}/{plot_tool.fold_pair}/output_cmap_esm/{fileName}.npy')
cmap[cmap > 0.4] = 1
cmap[cmap <= 0.4] = 0
return cmap
seq_fold1 = extract_protein_sequence(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold1}.pdb')
seq_fold2 = extract_protein_sequence(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold2}.pdb')
fold1_idxs,fold2_idxs = get_align_indexes(seq_fold1, seq_fold2)
Original Structure Visualization¶
InĀ [5]:
pdb_file1 = read_pdb_file(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold1}.pdb')
plot_tool.plot_single_fold(pdb_file1,label=plot_tool.fold1)
pdb_file2 = read_pdb_file(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold2}.pdb')
plot_tool.plot_single_fold(pdb_file2,label=plot_tool.fold2)
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
InĀ [6]:
# plot_tool.plot_fold_alignement()
plot_tool.plot_fold_alignement_(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold1}.pdb',f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold2}.pdb')
@> 2016 atoms and 34 coordinate set(s) were parsed in 0.48s.
@> 2016 atoms and 1 coordinate set(s) were parsed in 0.05s.
2n0aD_TEMP Blue:2kkwA Red:2n0aD
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
Best AlphaFold Prediction¶
InĀ [7]:
df_af = pd.read_csv(f'{plot_tool.folder}/{fold_pair}/Analysis/df_af.csv')
df_af = df_af[df_af.cluster_num != 'Query'].iloc[:,1:-1]
df_af.head(15)
Out[7]:
| pdb_file | score_pdb1 | score_pdb2 | Fold | cluster_num | |
|---|---|---|---|---|---|
| 5 | ShallowMsa_000_unrelaxed_rank_001_alphafold2_p... | 0.25303 | 0.19187 | 2kkw | 000 |
| 6 | ShallowMsa_000_unrelaxed_rank_002_alphafold2_p... | 0.24393 | 0.17646 | 2kkw | 000 |
| 7 | ShallowMsa_000_unrelaxed_rank_003_alphafold2_p... | 0.19416 | 0.19883 | 2n0a | 000 |
| 8 | ShallowMsa_000_unrelaxed_rank_004_alphafold2_p... | 0.27511 | 0.18675 | 2kkw | 000 |
| 9 | ShallowMsa_000_unrelaxed_rank_005_alphafold2_p... | 0.23630 | 0.21021 | 2kkw | 000 |
| 10 | ShallowMsa_001_unrelaxed_rank_001_alphafold2_p... | 0.28257 | 0.17547 | 2kkw | 001 |
| 11 | ShallowMsa_001_unrelaxed_rank_002_alphafold2_p... | 0.33222 | 0.18081 | 2kkw | 001 |
| 12 | ShallowMsa_001_unrelaxed_rank_003_alphafold2_p... | 0.35912 | 0.19107 | 2kkw | 001 |
| 13 | ShallowMsa_001_unrelaxed_rank_004_alphafold2_p... | 0.29224 | 0.18236 | 2kkw | 001 |
| 14 | ShallowMsa_001_unrelaxed_rank_005_alphafold2_p... | 0.33187 | 0.20489 | 2kkw | 001 |
| 15 | ShallowMsa_002_unrelaxed_rank_001_alphafold2_p... | 0.35479 | 0.19327 | 2kkw | 002 |
| 16 | ShallowMsa_002_unrelaxed_rank_002_alphafold2_p... | 0.22847 | 0.15543 | 2kkw | 002 |
| 17 | ShallowMsa_002_unrelaxed_rank_003_alphafold2_p... | 0.27828 | 0.18678 | 2kkw | 002 |
| 18 | ShallowMsa_002_unrelaxed_rank_004_alphafold2_p... | 0.31590 | 0.21311 | 2kkw | 002 |
| 19 | ShallowMsa_002_unrelaxed_rank_005_alphafold2_p... | 0.26818 | 0.20419 | 2kkw | 002 |
InĀ [8]:
df_af[df_af.score_pdb1 > df_af.score_pdb2].head()
Out[8]:
| pdb_file | score_pdb1 | score_pdb2 | Fold | cluster_num | |
|---|---|---|---|---|---|
| 5 | ShallowMsa_000_unrelaxed_rank_001_alphafold2_p... | 0.25303 | 0.19187 | 2kkw | 000 |
| 6 | ShallowMsa_000_unrelaxed_rank_002_alphafold2_p... | 0.24393 | 0.17646 | 2kkw | 000 |
| 8 | ShallowMsa_000_unrelaxed_rank_004_alphafold2_p... | 0.27511 | 0.18675 | 2kkw | 000 |
| 9 | ShallowMsa_000_unrelaxed_rank_005_alphafold2_p... | 0.23630 | 0.21021 | 2kkw | 000 |
| 10 | ShallowMsa_001_unrelaxed_rank_001_alphafold2_p... | 0.28257 | 0.17547 | 2kkw | 001 |
InĀ [9]:
try:
max_af_pdb1 = df_af[df_af.score_pdb1 > df_af.score_pdb2].sort_values(by='score_pdb1',ascending=False).pdb_file.iloc[0]
max_af_pdb1 = f'{plot_tool.folder}/{fold_pair}/AF_preds/{max_af_pdb1}'
pdb_file1_ = f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold1}.pdb'
score = df_af[df_af.score_pdb1 > df_af.score_pdb2].sort_values(by='score_pdb1',ascending=False).score_pdb1.iloc[0]
plot_tool.align_and_visualize_pdb(max_af_pdb1,pdb_file1_,score)
except:
print('No max_af_pdb1')
@> 1012 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 2016 atoms and 34 coordinate set(s) were parsed in 0.46s.
Blue:ShallowMsa_008_unrelaxed_rank_001_alphafold2_ptm_model_2_seed_000 Red:2kkwA
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
InĀ [10]:
# try:
# plot_tool.plot_single_fold(pdb_file1,label=plot_tool.fold1)
# max_af_pdb1 = df_af[df_af.score_pdb1 > df_af.score_pdb2].sort_values(by='score_pdb1',ascending=False).pdb_file.iloc[0]
# max_af_pdb1 = read_pdb_file(f'{plot_tool.folder}/{fold_pair}/AF_preds/{max_af_pdb1}')
# plot_tool.plot_single_fold(max_af_pdb1,label='MAX AF TM PDB1')
# except Exception as e:
# print(e)
InĀ [11]:
df_af[df_af.score_pdb1 < df_af.score_pdb2].head()
Out[11]:
| pdb_file | score_pdb1 | score_pdb2 | Fold | cluster_num | |
|---|---|---|---|---|---|
| 7 | ShallowMsa_000_unrelaxed_rank_003_alphafold2_p... | 0.19416 | 0.19883 | 2n0a | 000 |
| 26 | ShallowMsa_004_unrelaxed_rank_002_alphafold2_p... | 0.22112 | 0.22581 | 2n0a | 004 |
| 34 | ShallowMsa_005_unrelaxed_rank_005_alphafold2_p... | 0.19510 | 0.21105 | 2n0a | 005 |
| 81 | ShallowMsa_015_unrelaxed_rank_002_alphafold2_p... | 0.22389 | 0.24236 | 2n0a | 015 |
InĀ [12]:
# df_af[df_af.score_pdb2 > df_af.score_pdb1].sort_values(by='score_pdb2',ascending=False)
InĀ [13]:
import py3Dmol
from Bio import PDB, Align
from Bio.PDB import PDBParser, PDBIO
from Bio.PDB.Polypeptide import three_to_one
from io import StringIO
import numpy as np
def visualize_structure_alignment(pdb_file1, pdb_file2, chain1='A', chain2='A'):
# Parse PDB files
parser = PDBParser(QUIET=True)
structure1 = parser.get_structure("protein1", pdb_file1)
structure2 = parser.get_structure("protein2", pdb_file2)
# Extract sequences
seq1 = "".join([three_to_one(res.resname) for res in structure1[0][chain1] if res.id[0] == ' '])
seq2 = "".join([three_to_one(res.resname) for res in structure2[0][chain2] if res.id[0] == ' '])
# Perform sequence alignment
aligner = Align.PairwiseAligner()
alignments = aligner.align(seq1, seq2)
best_alignment = next(alignments)
# Get aligned positions
aligned_positions = [(i, j) for i, j in zip(best_alignment.path[0], best_alignment.path[1])
if i != -1 and j != -1]
# Extract aligned CA atoms
ca_atoms1 = [atom for atom in structure1[0][chain1].get_atoms() if atom.name == 'CA']
ca_atoms2 = [atom for atom in structure2[0][chain2].get_atoms() if atom.name == 'CA']
aligned_atoms1 = [ca_atoms1[i] for i, _ in aligned_positions]
aligned_atoms2 = [ca_atoms2[j] for _, j in aligned_positions]
# Superimpose structures
super_imposer = PDB.Superimposer()
super_imposer.set_atoms(aligned_atoms1, aligned_atoms2)
super_imposer.apply(structure2.get_atoms())
# Function to get PDB string
def get_pdb_string(structure):
io = StringIO()
pdb_io = PDBIO()
pdb_io.set_structure(structure)
pdb_io.save(io)
return io.getvalue()
# Create a py3Dmol view
view = py3Dmol.view(width=800, height=600)
# Add the first structure
view.addModel(get_pdb_string(structure1), 'pdb')
view.setStyle({'model': -1}, {'cartoon': {'color': 'blue'}})
# Add the second (aligned) structure
view.addModel(get_pdb_string(structure2), 'pdb')
view.setStyle({'model': -1}, {'cartoon': {'color': 'red'}})
# Zoom to fit the structures
view.zoomTo()
# Calculate RMSD
rmsd = super_imposer.rms
# Add RMSD and alignment info as labels
view.addLabel(f"RMSD: {rmsd:.2f} Ć
", {'position': {'x': -20, 'y': -20}, 'backgroundColor': 'white', 'fontColor': 'black'})
view.addLabel(f"Aligned: {len(aligned_positions)} of {len(seq1)} (blue) and {len(seq2)} (red) residues",
{'position': {'x': -20, 'y': -40}, 'backgroundColor': 'white', 'fontColor': 'black'})
return view
# Example usage:
# view = visualize_structure_alignment('protein1.pdb', 'protein2.pdb')
# view.show()
InĀ [14]:
try:
max_af_pdb2 = df_af[df_af.score_pdb2 > df_af.score_pdb1].sort_values(by='score_pdb2',ascending=False).pdb_file.iloc[0]
score = df_af[df_af.score_pdb2 > df_af.score_pdb1].sort_values(by='score_pdb2',ascending=False).score_pdb2.iloc[0]
max_af_pdb2_ = f'{plot_tool.folder}/{fold_pair}/AF_preds/{max_af_pdb2}'
pdb_file2_ = f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold2}.pdb'
print(f'red : {plot_tool.fold2}')
print(f'blue : {max_af_pdb2}')
# plot_tool.plot_fold_alignement_(pdb_file2_,max_af_pdb2_)
visualize_alignement_structure(pdb_file2_,max_af_pdb2_)
# plot_tool.align_and_visualize_pdb(max_af_pdb2_,pdb_file2_)
except Exception as e:
print(e)
red : 2n0aD blue : ShallowMsa_015_unrelaxed_rank_002_alphafold2_ptm_model_1_seed_000.pdb
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
Best EsmFold Prediction¶
InĀ [15]:
df_esmfold_analysis = pd.read_csv(f'{plot_tool.folder}_res/df_esmfold_analysis.csv')
df_esmfold = df_esmfold_analysis[df_esmfold_analysis.fold_pair == fold_pair]
df_esmfold_analysis.head()
Out[15]:
| fold_pair | fold | TMscore_fold1 | TMscore_fold2 | cluster_num | TM_mean_cluster_pdb1 | TM_mean_cluster_pdb2 | sample_count | is_fold_1 | is_fold_2 | cluster_fold_1 | cluster_fold_2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1dzlA_5keqF | ShallowMsa_000_1.pdb | 0.21478 | 0.22071 | 000 | 0.281451 | 0.2755 | 10 | 1 | 0 | 1 | 0 |
| 1 | 1dzlA_5keqF | ShallowMsa_000_0.pdb | 0.23730 | 0.24378 | 000 | 0.281451 | 0.2755 | 10 | 1 | 0 | 1 | 0 |
| 2 | 1dzlA_5keqF | ShallowMsa_000_2.pdb | 0.38172 | 0.36978 | 000 | 0.281451 | 0.2755 | 10 | 1 | 0 | 1 | 0 |
| 3 | 1dzlA_5keqF | ShallowMsa_000_3.pdb | 0.38407 | 0.38314 | 000 | 0.281451 | 0.2755 | 10 | 1 | 0 | 1 | 0 |
| 4 | 1dzlA_5keqF | ShallowMsa_000_7.pdb | 0.18823 | 0.17477 | 000 | 0.281451 | 0.2755 | 10 | 1 | 0 | 1 | 0 |
InĀ [16]:
try:
max_esm_pdb1 = df_esmfold[df_esmfold.TM_mean_cluster_pdb1 > df_esmfold.TM_mean_cluster_pdb2].sort_values(by='TMscore_fold1',ascending=False).fold.iloc[0]
score = df_esmfold[df_esmfold.TM_mean_cluster_pdb1 > df_esmfold.TM_mean_cluster_pdb2].sort_values(by='TMscore_fold1',ascending=False).TMscore_fold1.iloc[0]
# max_esm_pdb1_ = read_pdb_file(f'{plot_tool.folder}/{fold_pair}/output_esm_fold/{max_esm_pdb1}')
# plot_tool.plot_single_fold(max_esm_pdb1_,label='MAX ESM TM PDB1')
plot_tool.align_and_visualize_pdb(pdb_file1_,f'{plot_tool.folder}/{fold_pair}/output_esm_fold/{max_esm_pdb1}',score)
except Exception as e:
print(e)
single positional indexer is out-of-bounds
InĀ [17]:
try:
df_esmfold = df_esmfold[df_esmfold.fold.str.contains('ShallowMsa')]
if len(df_esmfold[df_esmfold.TM_mean_cluster_pdb1 < df_esmfold.TM_mean_cluster_pdb2])>0:
max_esm_pdb2 = df_esmfold[df_esmfold.TM_mean_cluster_pdb1 < df_esmfold.TM_mean_cluster_pdb2].sort_values(by='TMscore_fold2',ascending=False).fold.iloc[0]
score = df_esmfold[df_esmfold.TM_mean_cluster_pdb1 < df_esmfold.TM_mean_cluster_pdb2].sort_values(by='TMscore_fold2',ascending=False).TMscore_fold2.iloc[0]
print(f'red:{plot_tool.fold2}')
print(f'blue:{max_esm_pdb2}')
visualize_alignement_structure(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold2}.pdb',f'{plot_tool.folder}/{fold_pair}/output_esm_fold/{max_esm_pdb2}')
except Exception as e:
print(e)
Contact maps¶
InĀ [18]:
try:
df_cmap = pd.read_csv(f'{plot_tool.folder}/{fold_pair}/Analysis/new_cmap_res.csv')
df_cmap['f1Score_1'] = (df_cmap['precision_1']*df_cmap['recall_1'])/(df_cmap['precision_1']+df_cmap['recall_1'])
df_cmap['f1Score_2'] = (df_cmap['precision_2']*df_cmap['recall_2'])/(df_cmap['precision_2']+df_cmap['recall_2'])
df_cmap.head()
except Exception as e:
print(e)
InĀ [19]:
try:
max_f1Score_pdb1 = df_cmap[df_cmap.f1Score_1 > df_cmap.f1Score_2].sort_values(by='f1Score_1',ascending=False).iloc[0]
except:
print('No max_f1Score_pdb1')
try:
max_f1Score_pdb2 = df_cmap[df_cmap.f1Score_1 < df_cmap.f1Score_2].sort_values(by='f1Score_2',ascending=False).iloc[0]
except:
print('No max_f1Score_pdb2')
try:
max_precision_pdb1 = df_cmap[df_cmap.precision_1 > df_cmap.precision_2].sort_values(by='precision_1',ascending=False).iloc[0]
except:
print('No max_precision_pdb1')
try:
max_precision_pdb2 = df_cmap[df_cmap.precision_1 < df_cmap.precision_2].sort_values(by='precision_2',ascending=False).iloc[0]
except:
print('No max_precision_pdb2')
try:
max_recall_pdb1 = df_cmap[df_cmap.recall_1 > df_cmap.recall_2].sort_values(by='recall_1',ascending=False).iloc[0]
except:
print('No max_recall_pdb1')
try:
max_recall_pdb2 = df_cmap[df_cmap.recall_1 < df_cmap.recall_2].sort_values(by='recall_2',ascending=False).iloc[0]
except:
print('No max_recall_pdb2')
No max_f1Score_pdb1 No max_f1Score_pdb2 No max_precision_pdb1 No max_precision_pdb2 No max_recall_pdb1 No max_recall_pdb2
InĀ [20]:
tolerance = 2
cmap_pdb1 = plot_tool.get_contact_map_from_pdb(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold1}.pdb')[fold1_idxs][:, fold1_idxs]
cmap_pdb2 = plot_tool.get_contact_map_from_pdb(f'{plot_tool.folder}/{plot_tool.fold_pair}/chain_pdb_files/{plot_tool.fold2}.pdb')[fold2_idxs][:, fold2_idxs]
dilated_cmap1 = dilate_with_tolerance(cmap_pdb1, tolerance)
dilated_cmap2 = dilate_with_tolerance(cmap_pdb2, tolerance)
overlap_dilated = np.logical_and(dilated_cmap1, dilated_cmap2).astype(int)
try:
cmap_pred_f1_1 = load_pred_cmap(max_f1Score_pdb1.fileName)[fold1_idxs][:, fold1_idxs]
except:
print('No max_f1Score_pdb1')
try:
cmap_pred_f1_2 = load_pred_cmap(max_f1Score_pdb2.fileName)[fold1_idxs][:, fold1_idxs]
except:
print('No max_f1Score_pdb2')
try:
cmap_pred_precision_1 = load_pred_cmap(max_precision_pdb1.fileName)[fold1_idxs][:, fold1_idxs]
except:
print('No cmap_pred_precision_1')
try:
cmap_pred_precision_2 = load_pred_cmap(max_precision_pdb2.fileName)[fold1_idxs][:, fold1_idxs]
except:
print('No cmap_pred_precision_2')
try:
cmap_pred_recall_1 = load_pred_cmap(max_recall_pdb1.fileName)[fold1_idxs][:, fold1_idxs]
except:
print('No cmap_pred_recall_1')
try:
cmap_pred_recall_2 = load_pred_cmap(max_recall_pdb2.fileName)[fold1_idxs][:, fold1_idxs]
except:
print('No cmap_pred_recall_2')
only_fold1 = np.logical_and(cmap_pdb1, np.logical_not(overlap_dilated))
only_fold2 = np.logical_and(cmap_pdb2, np.logical_not(overlap_dilated))
No max_f1Score_pdb1 No max_f1Score_pdb2 No cmap_pred_precision_1 No cmap_pred_precision_2 No cmap_pred_recall_1 No cmap_pred_recall_2
InĀ [21]:
try:
plt.figure(figsize=(15, 8))# You can adjust the size as needed
try:
subplot(1,2,1)
upper_array = np.clip(np.multiply(dilate_with_tolerance(cmap_pred_f1_1, tolerance), only_fold1),0,1)
lower_array = np.clip(cmap_pdb1,0,1)
n = lower_array.shape[0]
upper_mask = np.tril(np.ones(n),k=0)[::-1]
lower_mask = np.logical_not(upper_mask)[::-1].astype(int)
overlap_lower = 0.75*np.multiply(dilated_cmap2,lower_mask).astype(int)
predicted_map = 0.75*np.multiply(cmap_pred_f1_1,upper_mask[::-1])
combined_array = np.clip(np.multiply(lower_array,lower_mask)+ 0.25*predicted_map - overlap_lower,0,1) + np.clip(np.multiply(upper_array,upper_mask[::-1]),0,1)
mappable1 = imshow(np.clip(combined_array,0,1),origin='lower', cmap='Blues')
plt.title(f'Orignal {plot_tool.fold2} Vs Best F1 score {round(max_f1Score_pdb2.f1Score_1,2)}')
except:
print(f'No plot for f1Score_1')
try:
subplot(1,2,2)
upper_array = np.clip(np.multiply(dilate_with_tolerance(cmap_pred_f1_2, tolerance), only_fold2),0,1)
lower_array = np.clip(cmap_pdb2,0,1)
n = lower_array.shape[0]
upper_mask = np.tril(np.ones(n),k=0)[::-1]
lower_mask = np.logical_not(upper_mask)[::-1].astype(int)
overlap_lower = 0.75*np.multiply(dilated_cmap1,lower_mask).astype(int)
predicted_map = 0.75*np.multiply(cmap_pred_f1_2,upper_mask[::-1])
combined_array = np.clip(np.multiply(lower_array,lower_mask)+ 0.25*predicted_map - overlap_lower,0,1) + np.clip(np.multiply(upper_array,upper_mask[::-1]),0,1)
mappable1 = imshow(np.clip(combined_array,0,1),origin='lower', cmap='Blues')
plt.title(f'Orignal {plot_tool.fold2} Vs Best F1 score {round(max_f1Score_pdb2.f1Score_2,2)}')
except:
print(f'No plot for f1Score_2')
white_patch = mpatches.Patch(color='lightsteelblue', label='Overlap')
plt.figlegend(handles=[white_patch], loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=2, title='Contact map legend')
except Exception as e:
print(e)
# print('max_f1Score_pdb1:')
# print(max_f1Score_pdb1)
# print('max_f1Score_pdb2:')
# print(max_f1Score_pdb2)
No plot for f1Score_1 No plot for f1Score_2
InĀ [22]:
try:
plt.figure(figsize=(15, 8)) # You can adjust the size as needed
subplot(1,2,1)
try:
upper_array = np.clip(np.multiply(dilate_with_tolerance(cmap_pred_precision_1, tolerance), only_fold1),0,1)
lower_array = np.clip(cmap_pdb1,0,1)
n = lower_array.shape[0]
upper_mask = np.tril(np.ones(n),k=0)[::-1]
lower_mask = np.logical_not(upper_mask)[::-1].astype(int)
overlap_lower = 0.75*np.multiply(dilated_cmap2,lower_mask).astype(int)
predicted_map = 0.75*np.multiply(cmap_pred_precision_1,upper_mask[::-1])
combined_array = np.clip(np.multiply(lower_array,lower_mask)+ 0.25*predicted_map - overlap_lower,0,1) + np.clip(np.multiply(upper_array,upper_mask[::-1]),0,1)
mappable1 = imshow(np.clip(combined_array,0,1),origin='lower', cmap='Blues')
plt.title(f'Orignal {plot_tool.fold2} Vs Best Precision score {round(max_precision_pdb1.precision_1,2)}')
except:
print(f'No precision_1')
try:
subplot(1,2,2)
upper_array = np.clip(np.multiply(dilate_with_tolerance(cmap_pred_precision_2, tolerance), only_fold2),0,1)
lower_array = np.clip(cmap_pdb2,0,1)
n = lower_array.shape[0]
upper_mask = np.tril(np.ones(n),k=0)[::-1]
lower_mask = np.logical_not(upper_mask)[::-1].astype(int)
overlap_lower = 0.75*np.multiply(dilated_cmap1,lower_mask).astype(int)
predicted_map = 0.75*np.multiply(cmap_pred_precision_2,upper_mask[::-1])
combined_array = np.clip(np.multiply(lower_array,lower_mask)+ 0.25*predicted_map - overlap_lower,0,1) + np.clip(np.multiply(upper_array,upper_mask[::-1]),0,1)
mappable1 = imshow(np.clip(combined_array,0,1),origin='lower', cmap='Blues')
plt.title(f'Orignal {plot_tool.fold2} Vs Best Precision score {round(max_precision_pdb2.precision_2,2)}')
except:
print(f'No precision_2')
white_patch = mpatches.Patch(color='lightsteelblue', label='Overlap')
plt.figlegend(handles=[white_patch], loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=2, title='Contact map legend')
except Exception as e:
print(e)
No precision_1 No precision_2
InĀ [23]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from IPython.display import display
try:
fig, axs = plt.subplots(1, 2, figsize=(15, 8)) # Create a figure and two subplots
# Plot for the first subplot
try:
upper_array = np.clip(np.multiply(dilate_with_tolerance(cmap_pred_recall_1, tolerance), only_fold1), 0, 1)
lower_array = np.clip(cmap_pdb1, 0, 1)
n = lower_array.shape[0]
upper_mask = np.tril(np.ones(n), k=0)[::-1]
lower_mask = np.logical_not(upper_mask)[::-1].astype(int)
overlap_lower = 0.75 * np.multiply(dilated_cmap2, lower_mask).astype(int)
predicted_map = 0.75 * np.multiply(cmap_pred_recall_1, upper_mask[::-1])
combined_array = np.clip(np.multiply(lower_array, lower_mask) + 0.25 * predicted_map - overlap_lower, 0, 1) + np.clip(np.multiply(upper_array, upper_mask[::-1]), 0, 1)
axs[0].imshow(np.clip(combined_array, 0, 1), origin='lower', cmap='Blues')
axs[0].set_title(f'Orignal {plot_tool.fold2} Vs Best Recall score {round(max_recall_pdb1.recall_1, 2)}')
except:
print('No recall_1')
try:
# Plot for the second subplot
upper_array = np.clip(np.multiply(dilate_with_tolerance(cmap_pred_recall_2, tolerance), only_fold2), 0, 1)
lower_array = np.clip(cmap_pdb2, 0, 1)
n = lower_array.shape[0]
upper_mask = np.tril(np.ones(n), k=0)[::-1]
lower_mask = np.logical_not(upper_mask)[::-1].astype(int)
overlap_lower = 0.75 * np.multiply(dilated_cmap1, lower_mask).astype(int)
predicted_map = 0.75 * np.multiply(cmap_pred_recall_2, upper_mask[::-1])
combined_array = np.clip(np.multiply(lower_array, lower_mask) + 0.25 * predicted_map - overlap_lower, 0, 1) + np.clip(np.multiply(upper_array, upper_mask[::-1]), 0, 1)
axs[1].imshow(np.clip(combined_array, 0, 1), origin='lower', cmap='Blues')
axs[1].set_title(f'Orignal {plot_tool.fold2} Vs Best Recall score {round(max_recall_pdb2.recall_2, 2)}')
except:
print('No recall_2')
white_patch = mpatches.Patch(color='lightsteelblue', label='Overlap')
# plt.figlegend(handles=[white_patch], loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=2, title='Contact map legend')
plt.show() # Ensure the plot is displayed
except Exception as e:
print(f"An error occurred: {e}")
No recall_1 No recall_2